# 目标
# 遍历li标签节点   获取里面a标签的字符串数据
# 步骤
# 获取a标签   使用 etree.tostring 转为字符串

from lxml import etree
text = ''' <div> <ul> 
        <li class="item-1"><a>first item</a></li> 
        <li class="item-1"><a href="link2.html">second item</a></li> 
        <li class="item-inactive"><a href="link3.html">third item</a></li> 
        <li class="item-1"><a href="link4.html">fourth item</a></li> 
        <li class="item-0"><a href="link5.html">fifth item</a> 
        </ul> </div> '''

# 根据li标签进行分组
html = etree.HTML(text)
li_list = html.xpath("//li[@class='item-1']")

# 在每一组中继续进行数据的提取
for li in li_list:
    astr = etree.tostring(li.xpath('./a')[0])
    print(astr.decode('utf-8'))
